rm(list=ls())

library(tidyverse)

##Run prepdata function from rep_plots.r replication file from Hainmueller &
##Hopkins (2015) replication folder (pasted below)

###Link to repository:
###https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/25505

prepdata <- function(d){
  
  # prep estimates
  d$var <- rownames(d)
  colnames(d) <- c("pe","se","var")
  d$order <- 1:nrow(d)
  # compute Cis
  d$upper <-d$pe + 1.96*d$se
  d$lower <-d$pe - 1.96*d$se
  
  # define group
  d$group <- NA
  d$group[d$var %in% paste(c("1b",2),".FeatGender",sep="")]         <- "Gender"
  d$group[d$var %in% paste(c("1b",2:7),".FeatEd",sep="")]           <- "Education"
  d$group[d$var %in% paste(c("1b",2:11),".FeatJob",sep="")]         <- "Job"
  # Germany Baseline
  d$group[d$var %in% paste(c("1b", 2:10),".FeatCountry",sep="")] <- "Origin"
  d$group[d$var %in% paste(c("1b",2:3),".FeatReason",sep="")]       <- "Reason for Application"
  d$group[d$var %in% paste(c("1b",2:4),".FeatExp",sep="")]          <- "Job Experience"
  d$group[d$var %in% paste(c("1b",2:5),".FeatTrips",sep="")]        <- "Prior Entry"
  d$group[d$var %in% paste(c(1:2,"3b",4),".FeatPlans",sep="")]      <- "Job Plans"
  d$group[d$var %in% paste(c("1b",2:4),".FeatLang",sep="")]         <- "Language Skills"
  
  # order 
  d <- d[order(factor(d$group,levels=unique(d$group)[c(1,2,4,5,3,7,8,6,9)])),]
  d$order <- 1:nrow(d)
  
  # label attributes
  offset <- c("   ")
  
  d$var[d$group=="Gender"] <- paste(offset,c("female","male"))
  d$var[d$group=="Education"] <- paste(offset,c("no formal","4th grade",
                                                "8th grade","high school",
                                                "two-year college","college degree",
                                                "graduate degree"))
  
  d$var[d$group=="Language Skills"] <- paste(offset,c("fluent English",
                                                      "broken English",
                                                      "tried English but unable",
                                                      "used interpreter"))
  
  d$var[d$group=="Origin"] <- paste(offset,c("Germany","France","Mexico",
                                             "Philippines","Poland","India",
                                             "China","Sudan","Somalia","Iraq"))
  
  d$var[d$group=="Job"] <- paste(offset,c("janitor","waiter","child care provider",
                                          "gardener","financial analyst",
                                          "construction worker","teacher",
                                          "computer programmer","nurse",
                                          "research scientist","doctor"))
  
  d$var[d$group=="Reason for Application"] <- paste(offset,c("reunite with family",
                                                             "seek better job",
                                                             "escape persecution"))
  
  
  d$var[d$group=="Job Experience"] <- paste(offset,c("none","1-2 years",
                                                     "3-5 years","5+ years"))
  
  
  d$var[d$group=="Job Plans"] <- paste(offset,c("contract with employer",
                                                "interviews with employer",
                                                "will look for work",
                                                "no plans to look for work"))
  
  d$var[d$group=="Prior Entry"] <- paste(offset,c("never","once as tourist",
                                                  "many times as tourist","six months with family",
                                                  "once w/o authorization"))            
  # sub in group labels
  dd <- data.frame(var= c("Gender:",
                          " ",
                          "Education:",
                          "  ",
                          "Language:",
                          "   ",
                          "Origin:",
                          "    ",
                          "Profession:",
                          "     ",
                          "Job experience:",
                          "      ",
                          "Job plans:",
                          "       ",
                          "Application reason:",
                          "        ",
                          "Prior trips to U.S.:"
  ),order=c(.5,2.1,2.5,9.1,9.5,13.1,13.5,23.1,23.5,34.1,34.5,38.1,38.5,42.1, 42.5,45.1,45.5),
  pe=1,se=1,upper=1,lower=1,group=NA)
  d <- rbind(d,dd)
  d <-d[order(d$order),]
  d$var <- factor(d$var,levels=unique(d$var)[length(d$var):1])
  return(d)
}

# This one file is produced by their STATA code;
# we include it to ensure replication but not provide their original data
# directly
main.data <- read.table("data_from_hh/chosen.txt")
main.data <- prepdata(main.data)

#Focus on effect of country of origin
country_eff<-subset(main.data, group=="Origin")

country_eff <- dplyr::mutate(country_eff,
                             var = forcats::fct_reorder(.f = var, .x = order))
################FIGURE 1##################
g_main <- ggplot(country_eff,
       aes(x=var, ymin=lower,
           ymax=upper,
           y=pe)) +
  geom_hline(aes(yintercept=0), linetype = 'dashed') +
  geom_point() +
  geom_errorbar() +
  coord_flip() +
  theme_bw(base_size = 15) +
  ylim(-0.2, 0.2)+
  xlab('Country of Origin') + ylab('Effect') +
  theme(legend.position = 'none') 

ggsave(g_main, filename = 'figures/hh_2015_pooled.pdf',
   width = 8.5, height = 4
)

